# This script reproduces Figure 7 and Table 2 #

rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")
load("city_month.RData")
load("city_quarter.RData")
# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

results_arrests <- list()
model_arrests <- plm(log_arrests_f1 ~ 
                       tour + agp_treat,
                     index = c("county_id", "month"),
                     effect = "twoways", model = "within",
                     data = d_sub1)
vcov_arrests <- clubSandwich::vcovCR(model_arrests, type="CR1S", cluster = d_sub1$county_id)
coeftest(model_arrests, vcov_arrests)
se_arrests <- coeftest(model_arrests, vcov_arrests)


lower_arrests_tour_95 <- se_arrests["tour", "Estimate"] - qnorm(.975) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_95 <- se_arrests["tour", "Estimate"] + qnorm(.975) * se_arrests["tour", "Std. Error"]

lower_arrests_tour_90 <- se_arrests["tour", "Estimate"] - qnorm(.95) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_90 <- se_arrests["tour", "Estimate"] + qnorm(.95) * se_arrests["tour", "Std. Error"]

est_arrests_tour <- se_arrests["tour", "Estimate"]

lower_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] - qnorm(.975) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] + qnorm(.975) * se_arrests["agp_treat", "Std. Error"]

lower_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] - qnorm(.95) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] + qnorm(.95) * se_arrests["agp_treat", "Std. Error"]

est_arrests_agp_treat <- se_arrests["agp_treat", "Estimate"]
results_arrests[[1]] <- list("lower_arrests_tour_95" = lower_arrests_tour_95,
                             "upper_arrests_tour_95" = upper_arrests_tour_95,
                             "lower_arrests_tour_90" = lower_arrests_tour_90,
                             "upper_arrests_tour_90" = upper_arrests_tour_90,
                             "est_arrests_tour" = est_arrests_tour, 
                             "lower_arrests_agp_treat_95" = lower_arrests_agp_treat_95,
                             "upper_arrests_agp_treat_95" = upper_arrests_agp_treat_95,
                             "lower_arrests_agp_treat_90" = lower_arrests_agp_treat_90,
                             "upper_arrests_agp_treat_90" = upper_arrests_agp_treat_90,
                             "est_arrests_agp_treat" = est_arrests_agp_treat)


model_arrests <- plm(log_arrests_f1 ~ 
                       tour + agp_treat,
                     
                     index = c("city_id", "month"),
                     effect = "twoways", model = "within",
                     data = d3)
vcov_arrests <- clubSandwich::vcovCR(model_arrests, type="CR1S", cluster = d3$city_id)
coeftest(model_arrests, vcov_arrests)
se_arrests <- coeftest(model_arrests, vcov_arrests)

lower_arrests_tour_95 <- se_arrests["tour", "Estimate"] - qnorm(.975) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_95 <- se_arrests["tour", "Estimate"] + qnorm(.975) * se_arrests["tour", "Std. Error"]

lower_arrests_tour_90 <- se_arrests["tour", "Estimate"] - qnorm(.95) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_90 <- se_arrests["tour", "Estimate"] + qnorm(.95) * se_arrests["tour", "Std. Error"]

est_arrests_tour <- se_arrests["tour", "Estimate"]

lower_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] - qnorm(.975) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] + qnorm(.975) * se_arrests["agp_treat", "Std. Error"]

lower_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] - qnorm(.95) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] + qnorm(.95) * se_arrests["agp_treat", "Std. Error"]

est_arrests_agp_treat <- se_arrests["agp_treat", "Estimate"]
results_arrests[[2]] <- list("lower_arrests_tour_95" = lower_arrests_tour_95,
                             "upper_arrests_tour_95" = upper_arrests_tour_95,
                             "lower_arrests_tour_90" = lower_arrests_tour_90,
                             "upper_arrests_tour_90" = upper_arrests_tour_90,
                             "est_arrests_tour" = est_arrests_tour, 
                             "lower_arrests_agp_treat_95" = lower_arrests_agp_treat_95,
                             "upper_arrests_agp_treat_95" = upper_arrests_agp_treat_95,
                             "lower_arrests_agp_treat_90" = lower_arrests_agp_treat_90,
                             "upper_arrests_agp_treat_90" = upper_arrests_agp_treat_90,
                             "est_arrests_agp_treat" = est_arrests_agp_treat)

model_arrests <- plm(lead(log_arrests,1) ~ 
                       tour + agp_treat,
                     index = c("city_id", "time"),
                     effect = "twoways", model = "within",
                     data = d2)
vcov_arrests <- clubSandwich::vcovCR(model_arrests, type="CR1S", cluster = d2$city_id)
coeftest(model_arrests, vcov_arrests)
se_arrests <- coeftest(model_arrests, vcov_arrests)

lower_arrests_tour_95 <- se_arrests["tour", "Estimate"] - qnorm(.975) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_95 <- se_arrests["tour", "Estimate"] + qnorm(.975) * se_arrests["tour", "Std. Error"]

lower_arrests_tour_90 <- se_arrests["tour", "Estimate"] - qnorm(.95) * se_arrests["tour", "Std. Error"]
upper_arrests_tour_90 <- se_arrests["tour", "Estimate"] + qnorm(.95) * se_arrests["tour", "Std. Error"]

est_arrests_tour <- se_arrests["tour", "Estimate"]

lower_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] - qnorm(.975) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_95 <- se_arrests["agp_treat", "Estimate"] + qnorm(.975) * se_arrests["agp_treat", "Std. Error"]

lower_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] - qnorm(.95) * se_arrests["agp_treat", "Std. Error"]
upper_arrests_agp_treat_90 <- se_arrests["agp_treat", "Estimate"] + qnorm(.95) * se_arrests["agp_treat", "Std. Error"]

est_arrests_agp_treat <- se_arrests["agp_treat", "Estimate"]
results_arrests[[3]] <- list("lower_arrests_tour_95" = lower_arrests_tour_95,
                             "upper_arrests_tour_95" = upper_arrests_tour_95,
                             "lower_arrests_tour_90" = lower_arrests_tour_90,
                             "upper_arrests_tour_90" = upper_arrests_tour_90,
                             "est_arrests_tour" = est_arrests_tour, 
                             "lower_arrests_agp_treat_95" = lower_arrests_agp_treat_95,
                             "upper_arrests_agp_treat_95" = upper_arrests_agp_treat_95,
                             "lower_arrests_agp_treat_90" = lower_arrests_agp_treat_90,
                             "upper_arrests_agp_treat_90" = upper_arrests_agp_treat_90,
                             "est_arrests_agp_treat" = est_arrests_agp_treat)


pdf(file = "output/Figure7.pdf", 
    width = 8, height = 8)
plot(1:3,  xlab = "", ylab = "", 
     pch = 19,
     ylim = c(-.1, .2),
     xlim = c(0.75, 3.5),
     xaxt = "n",
     main = "Comparing the Effectiveness of Provincial and Central Inspections")

for (i in 1:length(results_arrests)) {
  result <- results_arrests[[i]]
  lines(c(i,i), c(result$lower_arrests_agp_treat_95, result$upper_arrests_agp_treat_95))
  lines(c(i,i), c(result$lower_arrests_agp_treat_90, result$upper_arrests_agp_treat_90), lwd = 5, 
        col = "grey")
  points(i, result$est_arrests_agp_treat, pch = 25)  
  
  lines(c(i+.15,i+.15), c(result$lower_arrests_tour_95, result$upper_arrests_tour_95))
  lines(c(i+.15,i+.15), c(result$lower_arrests_tour_90, result$upper_arrests_tour_90), lwd = 5, 
        col = "grey")
  points(i+.15, result$est_arrests_tour, pch = 19)  
  
  
  abline(h = 0, lty = 3)
}
legend(x = 0.75, y = .20,  legend = c("Effect of Provincial DIC tour on arrests",
                                      "Effect of Central DIC tour on arrests"),
       y.intersp = 2,
       # x.intersp = 0.3,
       xjust = 0,
       pch = c(16, 25), pt.cex = 1,
       bty = "n", ncol = 1, cex = .9, bg = "white")
axis(side = 1, at = c(1.1, 2.1, 3.1), line = 1, tick = FALSE,
     labels = c("County-level \n monthly arrests",
                "Prefecture-level \n monthly arrests",
                "Prefecture-level \n quarter arrests"))
dev.off()

## Table 2 ##
model_arrests1 <- plm(log_arrests_f1 ~ 
                        tour + agp_treat,
                      index = c("county_id", "month"),
                      effect = "twoways", model = "within",
                      data = d_sub1)
vcov_arrests1 <- clubSandwich::vcovCR(model_arrests1, type="CR1S", cluster = d_sub1$county_id)
coeftest(model_arrests, vcov_arrests)
se_arrests1 <- coeftest(model_arrests1, vcov_arrests1)

model_arrests2 <- plm(log_arrests_f1 ~ 
                        tour + agp_treat,
                      index = c("city_id", "month"),
                      effect = "twoways", model = "within",
                      data = d3)
vcov_arrests2 <- clubSandwich::vcovCR(model_arrests2, type="CR1S", cluster = d3$city_id)
coeftest(model_arrests2, vcov_arrests2)
se_arrests2 <- coeftest(model_arrests2, vcov_arrests2)

model_arrests3 <- plm(lead(log_arrests,1) ~ 
                        tour + agp_treat,
                      index = c("city_id", "time"),
                      effect = "twoways", model = "within",
                      data = d2)
vcov_arrests3 <- clubSandwich::vcovCR(model_arrests3, type="CR1S", cluster = d3$city_id)
coeftest(model_arrests3, vcov_arrests3)
se_arrests3 <- coeftest(model_arrests3, vcov_arrests3)

sink("output/Table2.tex")
stargazer(model_arrests1, 
          model_arrests2,
          model_arrests3,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Logged Arrests (t+1)",
                             "Logged Arrests (t+1)",
                             "Logged Arrests (t+1)"),
          column.labels = c("Logged Arrests (t+1)",
                            "Logged Arrests (t+1)",
                            "Logged Arrests (t+1)"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("county_id", "time", "city_id", "month", "quarter"
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("Unit FE?", "Yes", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes", "Yes")),
          covariate.labels = c("PDIC Inspection",
                               "PDIC Inspection"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(se_arrests1[,2],
                    se_arrests2[,2],
                    se_arrests3[,2]),
          p = list(se_arrests1[,4],
                   se_arrests2[,4],
                   se_arrests3[,4]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()
